import sklearn.metrics
import pandas as pd
import matplotlib.pyplot as plt
from autosklearn.regression import AutoSklearnRegressor

AUTO_SKLEARN_CONFIG = {
    'time_left_for_this_task': 60,
    'include_estimators': None,
    'exclude_estimators': None,
    # 'per_run_time_limit' : None,
    # 'initial_configurations_via_metalearning' : 25,
    # 'ensemble_size' : 50,
    'ensemble_nbest': 3,
    # 'max_models_on_disc' : 50,
    # 'seed' : 1,
    'memory_limit': None,
    # 'resampling_strategy' : 'holdout',
    # 'resampling_strategy_arguments' : None,
    # 'tmp_folder' : None,
    # 'delete_tmp_folder_after_terminate' : True,
    'n_jobs': -1,
    # 'dask_client' : None,
    # 'disable_evaluator_output' : False,
    # 'get_smac_object_callback' : None,
    # 'smac_scenario_args' : None,
    # 'logging_config' : None,
    # 'metadata_directory' : None,
    # 'metric' : None,
    # 'scoring_functions' : None,
    # 'load_models' : True,
    # 'get_trials_callback' : None,
}

if __name__ == '__main__':
    data = pd.read_csv("/home/igor/zjlab/data/经济预测特征/gdp_tax_data.csv", encoding='gb2312')
    data['GDP-1'] = data['GDP'].shift(1)
    data = data.dropna()

    cols = data.columns.tolist()
    cols.remove('time')
    cols.remove('GDP')
    X = data[cols].values
    y = data['GDP'].values
    X_train = X[:-3]
    y_train = y[:-3]
    automl = AutoSklearnRegressor().set_params(**AUTO_SKLEARN_CONFIG)
    automl.fit(X_train, y_train)

    # automl.performance_over_time_.plot(
    #     x='Timestamp',
    #     kind='line',
    #     legend=True,
    #     title='Auto-sklearn accuracy over time',
    #     grid=True,
    # )
    # plt.show()
    print(automl.sprint_statistics())
    print(automl.show_models())

    y_hat = automl.predict(X)
    print("Accuracy score", sklearn.metrics.r2_score(y[-3:], y_hat[-3:]))

    xrange = range(len(y))
    plt.plot(xrange, y, label='real')
    plt.plot(xrange, y_hat, label='pred')
    plt.legend()
    plt.show()
